This script uses a combination of the DEP-package and a limma-script that can handle paired samples (LA and RA from same horse). The DEP-package uses FDR-tools to generate q-values, which I have included in my own limma-script.
if (!require("pacman")) install.packages("pacman")
## Indlæser krævet pakke: pacman
pacman::p_load("edgeR", "readr", "readxl", "biomaRt", "magrittr", "tibble", "stringr",
"ggplot2", "data.table", "patchwork", "openxlsx", "dplyr", "missForest",
"RColorBrewer", "limma", "DEqMS", "preprocessCore", "DEP",
"SummarizedExperiment", "Metrics", "fdrtool", "aamisc", "sva")
# install aamisc package for MDS and Volcano plots
# Commented out for future use:
# pacman::p_load("qvalue", "rain", "limma", "devtools")
# url <- "https://cran.r-project.org/src/contrib/Archive/HarmonicRegression/HarmonicRegression_1.0.tar.gz"
# pkgFile <- "HarmonicRegression_1.0.tar.gz"
# download.file(url = url, destfile = pkgFile)
# install.packages(pkgs=pkgFile, type="source", repos=NULL)
# file.remove(pkgFile)
# pacman::p_load_gh("altintasali/aamisc")
# Colours for publication
publication_colors <- c("Control" = "#285291", "Metformin" = "#7C1516", "Sham" = "#9B9B9B")
# Load count data from a local file and metadata through Excel
# Define file paths
count_file <- "../../../data/count/report.unique_genes_matrix.tsv"
meta_file <- "../../../data/metadata/meta_proteomics.xlsx"
geneinfo_file <- "../../../data/gene_annotation/horse_gene_annotation.tsv.gz"
# Read count data
count <- readr::read_delim(count_file)
## Rows: 2967 Columns: 74
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Genes
## dbl (73): D:\Mass_spectrometry\Raw_data\Joakim\Simon Horse second run2\1A_RA...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Read metadata
meta <- readxl::read_excel(meta_file)
# Remove unnecessary or blank columns from MS machine
count <- count %>% select(-matches("D:\\\\Mass_spectrometry\\\\Raw_data\\\\Joakim\\\\Simon Horse second run2\\\\10A_RA11_1_24830.d"))
# Read and clean gene annotation file
geneinfo <- fread(geneinfo_file)
setnames(geneinfo, old = names(geneinfo), new = c("ENSEMBL", "ENSEMBLv", "Description_detailed",
"Chr", "Start", "End", "Strand", "GENENAME", "ENTREZID", "Description"))
geneinfo <- geneinfo %>% select(-ENSEMBLv, -Description_detailed) %>% distinct(ENSEMBL, .keep_all = TRUE)
# Merge gene annotation with count data
annot <- merge(count[, "Genes", drop = FALSE], geneinfo, by.x = "Genes", by.y = "GENENAME", all.x = TRUE)
# Clean count data
count <- count %>% remove_rownames() %>% column_to_rownames(var="Genes")
cleaned_names <- gsub("D:\\\\Mass_spectrometry\\\\Raw_data\\\\Joakim\\\\Simon Horse second run2\\\\|_.*", "", colnames(count))
colnames(count) <- cleaned_names
meta$`Sample ID` <- cleaned_names
#subset data to only RA terminal and baseline samples
meta <- meta[meta$Region %in% c("RA"), ]
count <- count[, colnames(count) %in% meta$`Sample ID`]
# Remove low count samples from Count and Meta
samples_to_remove <- c("25C", "12A", "22A", "19A")
count <- count[, !colnames(count) %in% samples_to_remove]
meta <- meta %>% filter(!`Sample ID` %in% samples_to_remove)
# Calculate the number of proteins in each sample
num_proteins <- colSums(!is.na(count))
num_proteins <- num_proteins[meta$`Sample ID`] # Ensure matching order with meta
# Define color scheme for groups
KUalt <- c("#7C1516", "#285291", "#434343", "#999999") # Custom color palette
group_colors <- setNames(KUalt[1:length(unique(meta$Group))], unique(meta$Group))
bar_colors <- group_colors[meta$Group]
# Plot the number of proteins in each sample before filtering
par(mfrow = c(1, 1))
barplot(num_proteins,
main = "Number of Proteins in Each Sample\nBefore Filtering",
xlab = "Sample",
ylab = "Number of Proteins",
col = bar_colors,
border = "black",
ylim = c(0, 2500),
las = 2)
# Print the total number of proteins before filtering
cat("The total number of proteins before filtering was", max(num_proteins), "\n")
## The total number of proteins before filtering was 2610
# Identify genes with less than three valid values in any condition
invalid_genes <- unique(unlist(lapply(unique(meta$Condition), function(condition) {
condition_columns <- meta$`Sample ID`[meta$Condition == condition]
condition_count <- count[, condition_columns, drop = FALSE]
rownames(condition_count)[rowSums(!is.na(condition_count)) < 3]
})))
# Remove invalid genes from the original count table
filtered_count <- count[!rownames(count) %in% invalid_genes, ]
# Calculate the number of proteins in each sample after filtering
num_proteins_after <- colSums(!is.na(filtered_count))
num_proteins_after <- num_proteins_after[meta$`Sample ID`] # Ensure matching order with meta
# Plot settings
par(mfrow = c(1, 2))
# First plot: Number of proteins in each sample before filtering
barplot(num_proteins,
main = "Number of Proteins in Each Sample\nBefore Filtering",
xlab = "Sample",
ylab = "Number of Proteins",
col = bar_colors,
border = "black",
ylim = c(0, 2500),
las = 2)
# Second plot: Number of proteins in each sample after filtering
barplot(num_proteins_after,
main = "Number of Proteins in Each Sample\nAfter Filtering",
xlab = "Sample",
ylab = "Number of Proteins",
col = bar_colors,
border = "black",
ylim = c(0, 2500),
las = 2)
# Print the total and average number of proteins before and after filtering
cat("The total number of proteins before filtering was", max(num_proteins), "\n")
## The total number of proteins before filtering was 2610
cat("The average number of proteins before filtering was", mean(num_proteins), "\n")
## The average number of proteins before filtering was 2168.578
cat("The total number of proteins after filtering was", max(num_proteins_after), "\n")
## The total number of proteins after filtering was 1288
cat("The average number of proteins after filtering was", mean(num_proteins_after), "\n")
## The average number of proteins after filtering was 1258.933
# Generate proxy columns similar to DEP's sample data to make the structure compatible.
proxy_column_names <- c("Protein.IDs", "Majority.protein.IDs", "Protein.names", "Gene.names",
"Fasta.headers", "Peptides", "Razor...unique.peptides", "Unique.peptides",
"Only.identified.by.site", "Reverse", "Potential.contaminant")
proxy_df <- data.frame(matrix(NA, nrow=nrow(filtered_count), ncol=length(proxy_column_names)))
colnames(proxy_df) <- proxy_column_names
# Assign row names to specific columns
rownames_to_columns <- rownames(filtered_count)
proxy_df[1:4] <- lapply(1:4, function(i) rownames_to_columns)
# Merge and rename sample columns
merged_df <- cbind(proxy_df, filtered_count)
colnames(merged_df)[12:ncol(merged_df)] <- paste0("LFQ.intensity.", colnames(filtered_count))
# Reorder columns
lfq_columns <- grep("^LFQ.intensity", colnames(merged_df), value = TRUE)
new_order <- c(proxy_column_names[1:8], lfq_columns, proxy_column_names[9:11])
data <- merged_df[, new_order]
print(colnames(data))
## [1] "Protein.IDs" "Majority.protein.IDs"
## [3] "Protein.names" "Gene.names"
## [5] "Fasta.headers" "Peptides"
## [7] "Razor...unique.peptides" "Unique.peptides"
## [9] "LFQ.intensity.1A" "LFQ.intensity.1B"
## [11] "LFQ.intensity.2A" "LFQ.intensity.2B"
## [13] "LFQ.intensity.3A" "LFQ.intensity.3B"
## [15] "LFQ.intensity.4A" "LFQ.intensity.4B"
## [17] "LFQ.intensity.5A" "LFQ.intensity.5B"
## [19] "LFQ.intensity.6A" "LFQ.intensity.6B"
## [21] "LFQ.intensity.7A" "LFQ.intensity.7B"
## [23] "LFQ.intensity.8A" "LFQ.intensity.8B"
## [25] "LFQ.intensity.9A" "LFQ.intensity.9B"
## [27] "LFQ.intensity.10A" "LFQ.intensity.10B"
## [29] "LFQ.intensity.11A" "LFQ.intensity.11B"
## [31] "LFQ.intensity.12B" "LFQ.intensity.13A"
## [33] "LFQ.intensity.13B" "LFQ.intensity.14A"
## [35] "LFQ.intensity.14B" "LFQ.intensity.15A"
## [37] "LFQ.intensity.15B" "LFQ.intensity.16A"
## [39] "LFQ.intensity.16B" "LFQ.intensity.17A"
## [41] "LFQ.intensity.17B" "LFQ.intensity.18A"
## [43] "LFQ.intensity.18B" "LFQ.intensity.19B"
## [45] "LFQ.intensity.20A" "LFQ.intensity.20B"
## [47] "LFQ.intensity.22B" "LFQ.intensity.23A"
## [49] "LFQ.intensity.23B" "LFQ.intensity.24A"
## [51] "LFQ.intensity.24B" "LFQ.intensity.25A"
## [53] "LFQ.intensity.25B" "Only.identified.by.site"
## [55] "Reverse" "Potential.contaminant"
#Are there any duplicated gene names?
data$Gene.names %>% duplicated() %>% any()
## [1] FALSE
# Make unique names using the annotation in the "Gene.names" column as primary names and the annotation in "Protein.IDs" as name for those that do not have an gene name.
data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
# Generate a SummarizedExperiment object using an experimental design
LFQ_columns <- grep("LFQ.", colnames(data_unique)) # get LFQ column numbers
# Prepare the metadata for DEP analysis
experimental_design <- data.frame(
label = colnames(filtered_count),
condition = meta$Condition,
replicate = NA # Initialize replicate column with NA
)
# Assign replicate numbers within each condition
experimental_design <- experimental_design %>%
group_by(condition) %>%
mutate(replicate = row_number()) %>%
ungroup()
# Set row names
row.names(experimental_design) <- experimental_design$label
## Warning: Setting row names on a tibble is deprecated.
# Ensure the row names of experimental_design are unique
experimental_design <- as.data.frame(experimental_design)
rownames(experimental_design) <- make.unique(rownames(experimental_design))
# Print the experimental design to verify
print(experimental_design)
## label condition replicate
## 1 1A RA_Metformin_Baseline 1
## 2 1B RA_Metformin_4months 1
## 3 2A RA_Control_Baseline 1
## 4 2B RA_Control_4months 1
## 5 3A RA_Metformin_Baseline 2
## 6 3B RA_Metformin_4months 2
## 7 4A RA_Sham_Baseline 1
## 8 4B RA_Sham_4months 1
## 9 5A RA_Metformin_Baseline 3
## 10 5B RA_Metformin_4months 3
## 11 6A RA_Control_Baseline 2
## 12 6B RA_Control_4months 2
## 13 7A RA_Metformin_Baseline 4
## 14 7B RA_Metformin_4months 4
## 15 8A RA_Control_Baseline 3
## 16 8B RA_Control_4months 3
## 17 9A RA_Metformin_Baseline 5
## 18 9B RA_Metformin_4months 5
## 19 10A RA_Control_Baseline 4
## 20 10B RA_Control_4months 4
## 21 11A RA_Metformin_Baseline 6
## 22 11B RA_Metformin_4months 6
## 23 12B RA_Control_4months 5
## 24 13A RA_Metformin_Baseline 7
## 25 13B RA_Metformin_4months 7
## 26 14A RA_Control_Baseline 5
## 27 14B RA_Control_4months 6
## 28 15A RA_Control_Baseline 6
## 29 15B RA_Control_4months 7
## 30 16A RA_Control_Baseline 7
## 31 16B RA_Control_4months 8
## 32 17A RA_Metformin_Baseline 8
## 33 17B RA_Metformin_4months 8
## 34 18A RA_Sham_Baseline 2
## 35 18B RA_Sham_4months 2
## 36 19B RA_Metformin_4months 9
## 37 20A RA_Control_Baseline 8
## 38 20B RA_Control_4months 9
## 39 22B RA_Control_4months 10
## 40 23A RA_Metformin_Baseline 9
## 41 23B RA_Metformin_4months 10
## 42 24A RA_Sham_Baseline 3
## 43 24B RA_Sham_4months 3
## 44 25A RA_Sham_Baseline 4
## 45 25B RA_Sham_4months 4
# Create the SummarizedExperiment Object
# This will allow DEP to use the data for further analysis and visualization
data_se <- make_se(data_unique, LFQ_columns, experimental_design)
# Plot a barplot of the protein identification overlap between samples
plot_frequency(data_se)
plot_numbers(data_se)
plot_coverage(data_se)
# Filter for proteins that are identified in all replicates of at least one condition #Commented out as we have already performed filtrering manually in the first step, which unlike the terminal samples, will be used here.
#data_filt <- filter_missval(data_se, thr = 0)
#plot_frequency(data_filt)
#Check un-normalized data
plot_normalization(data_se)
#VSN-normalization
data_se_norm <- normalize_vsn(data_se)
#Plot VSN-norm data
meanSdPlot(data_se_norm, rank = TRUE)
plot_normalization(data_se_norm)
###Verdict: After variance stabilisation, the median (a reasonable estimator of the standard deviation of feature level data conditional on the mean) is approximately a horizontal line.
#TODO: Data imputation may not be necessary for your case. limma can handle missing values.
# Plot a heatmap of proteins with missing values
plot_missval(data_se_norm)
# Plot intensity distributions and cumulative fraction of proteins with and without missing values
plot_detect(data_se_norm)
#The nature of the missing-ness: the proteins with missing values have low intensities on average. This data (MNAR and close to the detection limit) should be imputed by a left-censored imputation method, such as the quantile regression-based left-censored function (“QRILC”) or random draws from a left-shifted distribution (“MinProb” and “man”).
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp_MinProb <- impute(data_se_norm, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.4919121
# Impute missing data using random draws from a manually defined left-shifted Gaussian distribution (for MNAR)
data_imp_man <- impute( data_se_norm, fun = "man", shift = 1.8, scale = 0.3)
# Impute missing data using Quantile Regression Imputation of Left-Censored Data (It’s particularly useful when missing values are expected to be low or below a detection limit.)
data_imp_qrilc <- impute( data_se_norm, fun = "QRILC")
## Imputing along margin 2 (samples/columns).
# Plot intensity distributions before and after imputation
plot_imputation( data_se_norm, data_imp_MinProb, data_imp_man, data_imp_qrilc)
###Verdict: data_imp_Minprob seems to be addressing MNAR the best, and will be used downstream.
# Note: This analysis is intended as an extra quality control (QC) step.
# The main differential abundance analysis is performed using the `limma` package due to the more complex study design.
#Set contrasts
con <- c("RA_Control_4months_vs_RA_Sham_4months", "RA_Metformin_4months_vs_RA_Control_4months")
# Perform differential analysis using the specified contrasts - best on non-imputed data, honestly
data_diff <- test_diff(data_imp_qrilc, type = "manual", test = con)
## Tested contrasts: RA_Control_4months_vs_RA_Sham_4months, RA_Metformin_4months_vs_RA_Control_4months
# Denote significant proteins based on user defined cutoffs
dep <- add_rejections(data_diff, alpha = 0.05, lfc = log2(0))
# Generate a long data.frame
df_long <- get_df_long(dep)
df_wide <- get_df_wide(dep)
# Plot a volcano plot
plot_volcano(dep, contrast = "RA_Control_4months_vs_RA_Sham_4months", label_size = 2, add_names = TRUE)
plot_volcano(dep, contrast = "RA_Metformin_4months_vs_RA_Control_4months", label_size = 2, add_names = TRUE)
# Plot a frequency plot of significant proteins for the different conditions
plot_cond(dep)
# Generate a results table
data_results <- get_results(dep)
# Number of significant proteins
data_results %>% filter(significant) %>% nrow()
## [1] 10
# MDS plots on non-imputed data
#Log-transformation
logCounts <- filtered_count
# For PCA plot, we will remove the NAs
logCounts_noNA <- na.omit(logCounts)
# Make DGE list and define design
annot_reordered <- annot[match(rownames(logCounts_noNA), annot$Genes), ]
d <- DGEList(counts = logCounts_noNA, genes = annot_reordered, samples = meta)
design <- model.matrix(~0 + Condition, d$samples)
# Remove batch
logCounts_batchRemoved <- removeBatchEffect(logCounts_noNA,
batch = as.factor(d$samples$Horse),
design = design)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
meta$Group <- factor(meta$Group, levels = names(publication_colors))
# Plot MDS without batch removal
mds_no_batch <- plotMDS(logCounts_noNA, plot = FALSE)
dims <- list(p1 = c(1,2), p2 = c(1,3), p3 = c(2,3), p4 = c(1,4))
mds_plot_no_batch <- list()
# Without labels (No batch correction)
for (i in seq_along(dims)) {
mds_plot_no_batch[[i]] <- aamisc::ggMDS(mds = mds_no_batch,
meta = d$samples,
dim = dims[[i]],
color.by = "Group",
shape.by = "Timepoint",
legend.position = "right"
) +
scale_color_manual(values = publication_colors)
}
plot_no_batch <- patchwork::wrap_plots(mds_plot_no_batch, ncol = 2) + patchwork::plot_layout(guides = 'collect')
# With labels (No batch correction)
for (i in seq_along(dims)) {
mds_plot_no_batch[[i]] <- aamisc::ggMDS(mds = mds_no_batch,
meta = d$samples,
dim = dims[[i]],
color.by = "Group",
shape.by = "Timepoint",
legend.position = "right",
text.by = "Horse",
text.size = 1.5
) +
scale_color_manual(values = publication_colors)
}
plot_no_batch_with_labels <- patchwork::wrap_plots(mds_plot_no_batch, ncol = 2) + patchwork::plot_layout(guides = 'collect')
# Compare: Batch removed vs. no batch removed
mds_batch <- plotMDS(logCounts_batchRemoved, plot = FALSE)
mds_plot_batch <- list()
# Without labels (With batch correction)
for (i in seq_along(dims)) {
mds_plot_batch[[i]] <- aamisc::ggMDS(mds = mds_batch,
meta = d$samples,
dim = dims[[i]],
color.by = "Group",
shape.by = "Timepoint",
legend.position = "right"
) +
scale_color_manual(values = publication_colors)
}
plot_batch <- patchwork::wrap_plots(mds_plot_batch, ncol = 2) + patchwork::plot_layout(guides = 'collect')
# With labels (With batch correction)
for (i in seq_along(dims)) {
mds_plot_batch[[i]] <- aamisc::ggMDS(mds = mds_batch,
meta = d$samples,
dim = dims[[i]],
color.by = "Group",
shape.by = "Timepoint",
legend.position = "right",
text.by = "Horse",
text.size = 1.5
) +
scale_color_manual(values = publication_colors)
}
plot_batch_with_labels <- patchwork::wrap_plots(mds_plot_batch, ncol = 2) + patchwork::plot_layout(guides = 'collect')
# Display the plots
print(plot_no_batch)
print(plot_no_batch_with_labels)
print(plot_batch)
print(plot_batch_with_labels)
# Histograms will be generated for four datasets:
# 1. `normalized_unimputed`: Normalized but not imputed data.
# 2. `imputed_data_MinProb`: Data imputed using the "MinProb" method.
# 3. `imputed_data_qrilc`: Data imputed using the "QRILC" method.
# 4. `unfiltered_count`: Raw log-transformed counts after initial filtering.
# Extract the assay data from data_se_norm (normalized_unimputed)
normalized_unimputed <- as.data.frame(assay(data_se_norm))
colnames(normalized_unimputed) <- colnames(filtered_count)
rownames(normalized_unimputed) <- rownames(data_se_norm)
# Extract the assay data from data_imp_MinProb (imputed)
imputed_data_MinProb <- as.data.frame(assay(data_imp_MinProb))
colnames(imputed_data_MinProb) <- colnames(filtered_count)
rownames(imputed_data_MinProb) <- rownames(data_se_norm)
# Extract the assay data from data_imp_qrilc
imputed_data_qrilc <- as.data.frame(assay(data_imp_qrilc))
colnames(imputed_data_qrilc) <- colnames(filtered_count)
rownames(imputed_data_qrilc) <- rownames(data_se_norm)
# Extract the assay data from data_imp_man
imputed_data_man <- as.data.frame(assay(data_imp_man))
colnames(imputed_data_man) <- colnames(filtered_count)
rownames(imputed_data_man) <- rownames(data_se_norm)
# Plot histogram for normalized data
par(mfrow = c(1, 4))
hist(as.vector(as.matrix(normalized_unimputed)), breaks = 50, main = "Normalized Data", xlab = "Intensity")
hist(as.vector(as.matrix(imputed_data_MinProb)), breaks = 50, main = "MinProb Imputed Data", xlab = "Intensity")
hist(as.vector(as.matrix(imputed_data_qrilc)), breaks = 50, main = "QRILC Imputed Data", xlab = "Intensity")
hist(as.vector(as.matrix(filtered_count)), breaks = 50, main = "Unfiltered Data", xlab = "Intensity")
# Define the output folder path
output_folder <- "../../../../Timecourse/analysis/01_dge/output/"
# Function to save a matrix with row names as a separate column
save_matrix <- function(matrix, file_path) {
# Create a data frame from the matrix, including row names as a column
matrix_df <- as.data.frame(matrix)
matrix_df$GeneName <- rownames(matrix)
fwrite(matrix_df, file = file_path, sep = "\t", row.names = FALSE)
}
# Function to load a matrix and restore row names
load_matrix <- function(file_path) {
matrix_df <- fread(file_path, data.table = FALSE)
rownames(matrix_df) <- matrix_df$GeneName
matrix_df$GeneName <- NULL
return(as.matrix(matrix_df)) # Convert back to a matrix
}
# Save the matrices
# save_matrix(normalized_unimputed, paste0(output_folder, "normalized_unimputed.tsv"))
# save_matrix(imputed_data_MinProb, paste0(output_folder, "imputed_data_MinProb.tsv"))
# save_matrix(imputed_data_qrilc, paste0(output_folder, "imputed_data_qrilc.tsv"))
# Load the matrices in the future
normalized_unimputed <- load_matrix(paste0(output_folder, "normalized_unimputed.tsv"))
imputed_data_MinProb <- load_matrix(paste0(output_folder, "imputed_data_MinProb.tsv"))
imputed_data_qrilc <- load_matrix(paste0(output_folder, "imputed_data_qrilc.tsv"))
# In this workflow, SVA is used to remove hidden variance, and a blocking factor
# accounts for variance introduced by repeated measures (e.g., two samples from the same horse).
# Extract biological condition information
condition <- d$samples$Condition # Specify the biological condition
# Apply SVA to Adjust for Hidden Confounders
# Full model including condition
mod <- model.matrix(~ 0 + condition, data = d$samples)
# Null model excluding biological information
mod0 <- model.matrix(~ 1, data = d$samples)
# Estimate the number of surrogate variables
n.sv <- num.sv(imputed_data_MinProb, mod, method = "be")
cat("Number of surrogate variables estimated:", n.sv, "\n")
## Number of surrogate variables estimated: 8
# Run SVA to estimate surrogate variables
svobj <- sva(imputed_data_MinProb, mod, mod0, n.sv = n.sv)
## Number of significant surrogate variables is: 8
## Iteration (out of 5 ):1 2 3 4 5
# Visualize Surrogate Variables to Ensure No Biological Information is Captured
# Prepare SV data for plotting
sv_data <- as.data.frame(svobj$sv)
colnames(sv_data) <- paste0("SV", seq_len(ncol(svobj$sv)))
sv_data <- cbind(d$samples, sv_data)
# Generate pairwise scatter plots of surrogate variables
sv_plots <- list() # Store plots
sv_cols <- colnames(sv_data)[grep("^SV", colnames(sv_data))]
for (i in seq_along(sv_cols)) {
for (j in seq_along(sv_cols)) {
if (i < j) { # Plot each pair only once
sv_plots[[paste0("SV", i, "_SV", j)]] <- ggplot(sv_data, aes_string(x = sv_cols[i], y = sv_cols[j],
color = "Group", shape = "Timepoint")) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = paste("Surrogate Variables:", sv_cols[i], "vs", sv_cols[j]),
x = paste("Surrogate Variable", i),
y = paste("Surrogate Variable", j))
}
}
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print all scatter plots for manual inspection
for (plot_name in names(sv_plots)) {
print(sv_plots[[plot_name]])
}
cat("None of the surrogate variables captured variability related to disease group.\n")
## None of the surrogate variables captured variability related to disease group.
# Incorporate Surrogate Variables into the Design Matrix
modSv <- cbind(mod, svobj$sv)
# Run duplicateCorrelation to estimate correlation between repeated samples (blocking by horse)
corfit <- duplicateCorrelation(imputed_data_MinProb, modSv, block = as.factor(d$samples$Horse))
cat("Consensus correlation for repeated measures:", corfit$consensus.correlation, "\n")
## Consensus correlation for repeated measures: 0.1004124
# Fit the linear model with limma, accounting for repeated measures
fit <- lmFit(imputed_data_MinProb, modSv, block = as.factor(d$samples$Horse), correlation = corfit$consensus.correlation)
# Ensure column names in design matrix are syntactically valid
colnames(modSv) <- make.names(colnames(modSv))
# Define contrasts with the updated column names
con <- makeContrasts(
Metformin_vs_AF_RA = conditionRA_Metformin_4months - conditionRA_Control_4months,
AF_vs_Sham_RA = conditionRA_Control_4months - conditionRA_Sham_4months,
Terminal_vs_Baseline_Control = conditionRA_Control_4months - conditionRA_Control_Baseline,
Terminal_vs_Baseline_Metformin = conditionRA_Metformin_4months - conditionRA_Metformin_Baseline,
Diff_Treatment = (conditionRA_Metformin_4months - conditionRA_Metformin_Baseline) - (conditionRA_Control_4months - conditionRA_Control_Baseline),
Diff_Disease = (conditionRA_Control_4months - conditionRA_Control_Baseline) - (conditionRA_Sham_4months - conditionRA_Sham_Baseline),
Baseline_Difference_Metf_vs_AF = conditionRA_Metformin_Baseline - conditionRA_Control_Baseline,
Baseline_Difference_AF_vs_Sham = conditionRA_Control_Baseline - conditionRA_Sham_Baseline,
levels = modSv)
con
## Contrasts
## Levels Metformin_vs_AF_RA AF_vs_Sham_RA
## conditionRA_Control_4months -1 1
## conditionRA_Control_Baseline 0 0
## conditionRA_Metformin_4months 1 0
## conditionRA_Metformin_Baseline 0 0
## conditionRA_Sham_4months 0 -1
## conditionRA_Sham_Baseline 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## Contrasts
## Levels Terminal_vs_Baseline_Control
## conditionRA_Control_4months 1
## conditionRA_Control_Baseline -1
## conditionRA_Metformin_4months 0
## conditionRA_Metformin_Baseline 0
## conditionRA_Sham_4months 0
## conditionRA_Sham_Baseline 0
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
## Contrasts
## Levels Terminal_vs_Baseline_Metformin Diff_Treatment
## conditionRA_Control_4months 0 -1
## conditionRA_Control_Baseline 0 1
## conditionRA_Metformin_4months 1 1
## conditionRA_Metformin_Baseline -1 -1
## conditionRA_Sham_4months 0 0
## conditionRA_Sham_Baseline 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## Contrasts
## Levels Diff_Disease Baseline_Difference_Metf_vs_AF
## conditionRA_Control_4months 1 0
## conditionRA_Control_Baseline -1 -1
## conditionRA_Metformin_4months 0 0
## conditionRA_Metformin_Baseline 0 1
## conditionRA_Sham_4months -1 0
## conditionRA_Sham_Baseline 1 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## X 0 0
## Contrasts
## Levels Baseline_Difference_AF_vs_Sham
## conditionRA_Control_4months 0
## conditionRA_Control_Baseline 1
## conditionRA_Metformin_4months 0
## conditionRA_Metformin_Baseline 0
## conditionRA_Sham_4months 0
## conditionRA_Sham_Baseline -1
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
## X 0
# Apply contrasts and run eBayes
fit <- contrasts.fit(fit, con)
## Warning in contrasts.fit(fit, con): row names of contrasts don't match col
## names of coefficients
fit <- eBayes(fit, robust = TRUE, trend = TRUE)
# Extract DGE results using the BH method for FDR correction
res <- list() # List to store DGE results
for (i in colnames(con)) {
res_tmp <- topTable(fit, coef = i, adjust.method = "BH", number = Inf) # Get top table results
res_tmp <- res_tmp[!is.na(res_tmp$t), ] # Remove rows with NA values
res_tmp$Contrast <- i #TODO: replaced this part >>> rep(i, nrow(res_tmp)) # Store the contrast name
res[[i]] <- res_tmp # Add to the results list
# Print the number of differentially expressed genes based on adjusted p-values
n_adj_pval <- nrow(res_tmp[res_tmp$adj.P.Val < 0.05, ])
print(paste('Number of differentially expressed genes for', i, 'based on adjusted p-value (BH) =', n_adj_pval))
}
## [1] "Number of differentially expressed genes for Metformin_vs_AF_RA based on adjusted p-value (BH) = 0"
## [1] "Number of differentially expressed genes for AF_vs_Sham_RA based on adjusted p-value (BH) = 8"
## [1] "Number of differentially expressed genes for Terminal_vs_Baseline_Control based on adjusted p-value (BH) = 359"
## [1] "Number of differentially expressed genes for Terminal_vs_Baseline_Metformin based on adjusted p-value (BH) = 394"
## [1] "Number of differentially expressed genes for Diff_Treatment based on adjusted p-value (BH) = 1"
## [1] "Number of differentially expressed genes for Diff_Disease based on adjusted p-value (BH) = 10"
## [1] "Number of differentially expressed genes for Baseline_Difference_Metf_vs_AF based on adjusted p-value (BH) = 1"
## [1] "Number of differentially expressed genes for Baseline_Difference_AF_vs_Sham based on adjusted p-value (BH) = 42"
# Combine all results into a single data frame
res_all <- do.call(rbind, res)
# Map Gene Names Manually
res_all$GeneName <- sapply(seq_len(nrow(res_all)), function(i) {
gsub(paste0("^", res_all$Contrast[i], "\\."), "", rownames(res_all)[i])
})
# Split results by contrast for easier output
res_split <- split(res_all, res_all$Contrast)
# Optionally, save the results to files
# openxlsx::write.xlsx(x = res_split, file = "../../../../Timecourse/analysis/01_dge/output/dge_results_sva_combat.xlsx", asTable = TRUE)
# data.table::fwrite(x = res_all, file = "../../../../Timecourse/analysis/01_dge/output/dge_results_sva.tsv.gz", sep = "\t")
# Visualize Results with P-value Histograms
for (contrast in names(res_split)) {
p_values <- res_split[[contrast]]$P.Value
ggplot(data = data.frame(P.Value = p_values), aes(x = P.Value)) +
geom_histogram(breaks = seq(0, 1, by = 0.05), color = "black", fill = "lightblue") +
theme_minimal() +
labs(title = paste("P-value Histogram for", contrast),
x = "P-value",
y = "Frequency") +
xlim(0, 1) # Set x-axis limits
print(last_plot())
}
volcano_plots <- list()
for (i in names(res)){
volcano_plots[[i]] <- ggVolcano(x = res[[i]],
fdr = 0.05,
fdr.column = "adj.P.Val",
pvalue.column = "P.Value",
logFC = 0,
logFC.column = "logFC",
text.size = 2) +
theme_bw(base_size = 10) +
ggtitle(i)
}
## Warning in max(x[get(fdr.column) <= .fdr][, get(pvalue.column)]): no
## non-missing arguments to max; returning -Inf
patchwork::wrap_plots(volcano_plots, ncol = 3)
library(ggrepel)
# Create a named vector for ENSEMBL to Genes mapping
ensembl_to_Genes <- setNames(annot_reordered$Genes, annot_reordered$ENSEMBL)
# Map ENSEMBL IDs to Gene Names in the `res` list
for (contrast_name in names(res)) {
# Ensure the dataframe has ENSEMBL IDs as rownames
if (!"ENSEMBL" %in% colnames(res[[contrast_name]])) {
res[[contrast_name]]$ENSEMBL <- rownames(res[[contrast_name]])
}
# Map Gene Names using the annotation
res[[contrast_name]]$Genes <- ensembl_to_Genes[res[[contrast_name]]$ENSEMBL]
# Replace NA values in Genes with ENSEMBL IDs (to ensure plotting works even if some gene names are missing)
res[[contrast_name]]$Genes[is.na(res[[contrast_name]]$Genes)] <- res[[contrast_name]]$ENSEMBL[is.na(res[[contrast_name]]$Genes)]
}
# Load the volcano plot helper function
source("volcano_helpers.R")
# Create lists for volcano plots with and without labels
volcano_plots_no_labels <- list()
volcano_plots_with_labels <- list()
# Iterate over each contrast and create volcano plots
for (contrast_name in names(res)) {
# Ensure the Genes column is present for labeling
if (!"Genes" %in% colnames(res[[contrast_name]])) {
res[[contrast_name]]$Genes <- sapply(rownames(res[[contrast_name]]), function(x) gsub(".*_", "", x))
}
# Generate volcano plots using the helper function
volcano_plots <- create_custom_volcano_plot(
df = res[[contrast_name]],
logFC_col = "logFC",
pvalue_col = "P.Value",
adj_pvalue_col = "adj.P.Val",
contrast_name = contrast_name,
fc_cutoff = 0, # Set fold-change cutoff for significance
pvalue_cutoff = 0.05, # Set p-value cutoff
save_plot = TRUE,
output_path = "../output/", # Adjust output path if needed
show_labels = TRUE # Generate both labeled and unlabeled plots
)
# Store the plots
volcano_plots_no_labels[[contrast_name]] <- volcano_plots$No_Labels
volcano_plots_with_labels[[contrast_name]] <- volcano_plots$With_Labels
}
## Warning: ggrepel: 297 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 323 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Combine and display volcano plots without labels
combined_volcano_no_labels <- patchwork::wrap_plots(volcano_plots_no_labels, ncol = 3)
# Combine and display volcano plots with labels
combined_volcano_with_labels <- patchwork::wrap_plots(volcano_plots_with_labels, ncol = 3)
# Display the combined volcano plots
print(combined_volcano_no_labels)
print(combined_volcano_with_labels)
## Warning: ggrepel: 354 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 390 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Print individual volcano plots with labels for key contrasts
print(volcano_plots_with_labels[["Diff_Treatment"]])
print(volcano_plots_with_labels[["Diff_Disease"]])
# ggsave("../output/volcano_Diff_Treatment_for_figure.png", (volcano_plots_with_labels[["Diff_Treatment"]]),
# dpi = 600, width = 4, height = 3, units = "in")
# Understanding LogFC for "Diff_Treatment"
# Positive LogFC indicates:
# 1) Metformin group shows a relatively higher level compared to the control group.
# 2) This can occur if:
# a) Metformin increases and control decreases.
# b) Both increase, but metformin shows a greater increase.
# c) Both decrease, but metformin shows a smaller decrease.
# Negative LogFC indicates:
# 1) Metformin group shows a relatively lower level compared to the control group.
# 2) This can occur if:
# a) Metformin decreases and control increases.
# b) Both decrease, but metformin shows a greater decrease.
# c) Both increase, but metformin shows a smaller increase.
# Load ggpubr for enhanced visualization
library(ggpubr)
# Step 1: Define the function to create a violin plot with individual points and mean line
plot_protein_counts <- function(protein_of_interest) {
# Check if the protein is present in the dataset
if (protein_of_interest %in% rownames(data_se_norm)) {
# Extract the normalized counts for the protein
protein_counts <- assay(data_se_norm)[protein_of_interest,]
# Create a data frame for plotting
protein_df <- data.frame(
SampleID = colnames(data_se_norm), # Extract sample IDs
Count = protein_counts, # Protein counts
Condition = meta$Condition # Experimental conditions
)
# Generate violin plot with jitter points and mean line
p <- ggplot(protein_df, aes(x = Condition, y = Count, fill = Condition)) +
geom_violin(trim = FALSE, alpha = 0.5) + # Violin plot
geom_jitter(width = 0.2, size = 2, alpha = 0.7) + # Add individual points
stat_summary(fun = mean, geom = "crossbar", width = 0.5, fatten = 2, color = "red") + # Mean line
labs(
title = paste("Violin plot of normalized counts for", protein_of_interest), # Plot title
x = "Condition", y = "Normalized Abundance" # Axis labels
) +
theme_pubr() + # Apply a clean theme
scale_fill_brewer(palette = "Set3") # Color palette
print(p) # Display the plot
} else {
# Output message if protein is not found
cat("The protein", protein_of_interest, "is not present in the dataset.\n")
}
}
# Step 2: Visualize selected proteins with significant treatment effects to understand, why it is upregulated and downregulated (see comment in beginning of chunk)
# A) Electron Transport Chain Genes - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("NDUFA6") # Associated with metformin in studies related to Aortic Aneurysms, increases less in metformin group
plot_protein_counts("ATP5F1C") # Important candidate gene for treatment effects, small decrease in metformin group
plot_protein_counts("KARS1") # Downregulated in response to treatment
# B) Proteasome Proteins - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("PSMC5") # Protein linked to proteasome function
plot_protein_counts("PSMC4") # Another candidate for proteasome-associated mechanisms
plot_protein_counts("PSMD11") # Related to proteasome degradation processes
# C) Heat Shock Proteins (Hsp90 Family) - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("SUGT1") # Chaperone activity protein
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
plot_protein_counts("HSP90AA1") # Hsp90 protein linked to stress response
plot_protein_counts("DYNC1H1") # Motor protein involved in cellular transport
# D) Detoxification of Reactive Oxygen Species (ROS) - positively regulated (red) in "Diff_Treatment"
plot_protein_counts("TXNRD2") # Key enzyme in ROS detoxification
plot_protein_counts("TXN") # Thioredoxin, involved in redox regulation
plot_protein_counts("SOD3") # Superoxide dismutase, a primary ROS scavenger
# Additional Proteins of Interest
plot_protein_counts("DDAH1") # Dimethylarginine Dimethylaminohydrolase 1
plot_protein_counts("COQ8A") # Coenzyme Q8 homolog
plot_protein_counts("RICTOR") # Component of mTOR complex
## The protein RICTOR is not present in the dataset.
plot_protein_counts("YWHAE") # 14-3-3 protein epsilon, signaling protein
plot_protein_counts("TXNDC5") # Protein disulfide isomerase, ROS response
plot_protein_counts("GNAI2")
# Define the function to create a violin plot with individual points and mean line
plot_protein_counts <- function(protein_of_interest) {
# Check if the protein is present in the dataset
if (protein_of_interest %in% rownames(data_se_norm)) {
# Extract the normalized counts for the protein
protein_counts <- assay(data_se_norm)[protein_of_interest,]
# Create a data frame for plotting
protein_df <- data.frame(
SampleID = colnames(data_se_norm), # Extract sample IDs
Count = protein_counts, # Protein counts
Condition = meta$Condition # Experimental conditions
)
# Generate violin plot with jitter points and mean line
p <- ggplot(protein_df, aes(x = Condition, y = Count, fill = Condition)) +
geom_violin(trim = FALSE, alpha = 0.7, color = "black") + # Violin plot with border
geom_jitter(width = 0.15, size = 1.5, alpha = 0.6, color = "black") + # Individual points
stat_summary(fun = mean, geom = "crossbar", width = 0.5, fatten = 2, color = "darkred") + # Mean line
labs(
title = paste("Normalized Protein Counts for", protein_of_interest), # Plot title
x = "Condition", y = "Normalized Abundance" # Axis labels
) +
theme_pubr() + # Apply a clean, publication-ready theme
scale_fill_brewer(palette = "Set3") + # Use a color palette with enough colors
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5), # Centered and bold title
axis.title = element_text(size = 12, face = "bold"), # Bold axis titles
axis.text = element_text(size = 10, color = "black"), # Custom axis text
legend.position = "none" # Remove legend
)
# Print the plot
print(p)
} else {
# Output message if the protein is not found
cat("The protein", protein_of_interest, "is not present in the dataset.\n")
}
}
# Plot the protein counts for "GNAI2"
plot_protein_counts("GNAI2")
# Transform raw count data and remove NA values
logCounts_noNA <- na.omit(logCounts)
# Create DGEList object and define the design matrix
annot_reordered <- annot[match(rownames(logCounts_noNA), annot$Genes), ]
d <- DGEList(counts = logCounts_noNA, genes = annot_reordered, samples = meta)
design <- model.matrix(~0 + Condition, data = d$samples)
# 1. MDS Plot for Raw Data
mds_raw <- plotMDS(logCounts_noNA, plot = FALSE)
mds_plot_raw <- aamisc::ggMDS(mds = mds_raw, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
scale_color_manual(values = publication_colors) +
labs(title = "MDS: Raw Data", x = "MDS Dimension 1", y = "MDS Dimension 2") +
theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))
# 2. MDS Plot for Data Adjusted by Blocking for Horse
logCounts_blocked <- removeBatchEffect(logCounts_noNA, batch = as.factor(d$samples$Horse), design = design)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
mds_blocked <- plotMDS(logCounts_blocked, plot = FALSE)
mds_plot_blocked <- aamisc::ggMDS(mds = mds_blocked, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
scale_color_manual(values = publication_colors) +
labs(title = "MDS: Blocked for Horse", x = "MDS Dimension 1", y = "MDS Dimension 2") +
theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))
# 3. MDS Plot for Data Adjusted Using ComBat
combat_data <- ComBat(dat = logCounts_noNA, batch = as.factor(d$samples$Batch), mod = model.matrix(~Condition, data = d$samples))
## Found4batches
## Adjusting for5covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
mds_combat <- plotMDS(combat_data, plot = FALSE)
mds_plot_combat <- aamisc::ggMDS(mds = mds_combat, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
scale_color_manual(values = publication_colors) +
labs(title = "MDS: ComBat Adjusted", x = "MDS Dimension 1", y = "MDS Dimension 2") +
theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))
# 4. MDS Plot for Data Adjusted Using SVA
mod <- model.matrix(~0 + Condition, data = d$samples)
mod0 <- model.matrix(~1, data = d$samples)
n.sv <- num.sv(logCounts_noNA, mod, method = "be")
logCounts_noNA <- as.matrix(logCounts_noNA)
storage.mode(logCounts_noNA) <- "numeric"
svobj <- sva(logCounts_noNA, mod, mod0, n.sv = n.sv)
## Number of significant surrogate variables is: 2
## Iteration (out of 5 ):1 2 3 4 5
modSv <- cbind(mod, svobj$sv)
logCounts_sva <- removeBatchEffect(logCounts_noNA, batch = d$samples$Horse, design = modSv)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
mds_sva <- plotMDS(logCounts_sva, plot = FALSE)
mds_plot_sva <- aamisc::ggMDS(mds = mds_sva, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
scale_color_manual(values = publication_colors) +
labs(title = "MDS: SVA Adjusted", x = "MDS Dimension 1", y = "MDS Dimension 2") +
theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))
# 5. MDS Plot for Data Adjusted Using SVA (with Labels)
mds_plot_sva_labelled <- aamisc::ggMDS(mds = mds_sva, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right", text.by = "Horse", text.size = 1.5) +
scale_color_manual(values = publication_colors) +
labs(title = "MDS: SVA Adjusted (Labelled)", x = "MDS Dimension 1", y = "MDS Dimension 2") +
theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))
# Display the plots
print(mds_plot_raw)
print(mds_plot_combat)
print(mds_plot_blocked)
print(mds_plot_sva)
print(mds_plot_sva_labelled)
sessionInfo()
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Danish_Denmark.utf8 LC_CTYPE=Danish_Denmark.utf8
## [3] LC_MONETARY=Danish_Denmark.utf8 LC_NUMERIC=C
## [5] LC_TIME=Danish_Denmark.utf8
##
## time zone: Europe/Copenhagen
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggpubr_0.6.0 ggrepel_0.9.5
## [3] sva_3.50.0 BiocParallel_1.36.0
## [5] genefilter_1.84.0 mgcv_1.9-1
## [7] nlme_3.1-163 aamisc_0.1.5
## [9] fdrtool_1.2.17 Metrics_0.1.4
## [11] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [13] GenomicRanges_1.54.1 GenomeInfoDb_1.38.8
## [15] IRanges_2.36.0 S4Vectors_0.40.2
## [17] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [19] DEP_1.24.0 preprocessCore_1.64.0
## [21] DEqMS_1.20.0 matrixStats_1.2.0
## [23] RColorBrewer_1.1-3 missForest_1.5
## [25] dplyr_1.1.4 openxlsx_4.2.5.2
## [27] patchwork_1.2.0 data.table_1.14.10
## [29] ggplot2_3.5.0 stringr_1.5.1
## [31] tibble_3.2.1 magrittr_2.0.3
## [33] biomaRt_2.58.2 readxl_1.4.3
## [35] readr_2.1.5 edgeR_4.0.16
## [37] limma_3.58.1 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.2 later_1.3.2 norm_1.0-11.1
## [4] bitops_1.0-7 filelock_1.0.3 R.oo_1.26.0
## [7] cellranger_1.1.0 XML_3.99-0.16.1 lifecycle_1.0.4
## [10] rstatix_0.7.2 doParallel_1.0.17 vroom_1.6.5
## [13] lattice_0.21-9 MASS_7.3-60 backports_1.5.0
## [16] sass_0.4.9 rmarkdown_2.27 jquerylib_0.1.4
## [19] yaml_2.3.8 httpuv_1.6.13 doRNG_1.8.6
## [22] zip_2.3.1 MsCoreUtils_1.14.1 DBI_1.2.3
## [25] abind_1.4-5 zlibbioc_1.48.0 R.utils_2.12.3
## [28] purrr_1.0.2 HarmonicRegression_1.0 itertools_0.1-3
## [31] RCurl_1.98-1.14 rappdirs_0.3.3 sandwich_3.1-0
## [34] circlize_0.4.16 GenomeInfoDbData_1.2.11 annotate_1.80.0
## [37] MSnbase_2.28.1 ncdf4_1.22 codetools_0.2-20
## [40] DelayedArray_0.28.0 DT_0.33 xml2_1.3.6
## [43] tidyselect_1.2.1 gmm_1.8 shape_1.4.6.1
## [46] farver_2.1.1 gmp_0.7-4 BiocFileCache_2.10.2
## [49] jsonlite_1.8.8 multtest_2.58.0 GetoptLong_1.0.5
## [52] survival_3.5-8 iterators_1.0.14 systemfonts_1.1.0
## [55] foreach_1.5.2 tools_4.3.2 progress_1.2.3
## [58] ragg_1.3.2 Rcpp_1.0.12 glue_1.7.0
## [61] gridExtra_2.3 SparseArray_1.2.3 xfun_0.45
## [64] qvalue_2.34.0 shinydashboard_0.7.2 withr_3.0.0
## [67] BiocManager_1.30.23 fastmap_1.1.1 fansi_1.0.6
## [70] digest_0.6.34 R6_2.5.1 mime_0.12
## [73] textshaping_0.4.0 imputeLCMD_2.1 colorspace_2.1-0
## [76] Cairo_1.6-2 RSQLite_2.3.5 R.methodsS3_1.8.2
## [79] hexbin_1.28.3 utf8_1.2.4 tidyr_1.3.0
## [82] generics_0.1.3 prettyunits_1.2.0 httr_1.4.7
## [85] htmlwidgets_1.6.4 S4Arrays_1.2.0 pkgconfig_2.0.3
## [88] NISTunits_1.0.1 gtable_0.3.5 blob_1.2.4
## [91] ComplexHeatmap_2.18.0 impute_1.76.0 XVector_0.42.0
## [94] htmltools_0.5.8.1 carData_3.0-5 rain_1.36.0
## [97] MALDIquant_1.22.2 ProtGenerics_1.34.0 clue_0.3-65
## [100] scales_1.3.0 tmvtnorm_1.6 png_0.1-8
## [103] knitr_1.47 rstudioapi_0.16.0 reshape2_1.4.4
## [106] tzdb_0.4.0 rjson_0.2.21 curl_5.2.0
## [109] cachem_1.0.8 zoo_1.8-12 GlobalOptions_0.1.2
## [112] parallel_4.3.2 AnnotationDbi_1.64.1 mzID_1.40.0
## [115] vsn_3.70.0 pillar_1.9.0 grid_4.3.2
## [118] vctrs_0.6.5 pcaMethods_1.94.0 promises_1.2.1
## [121] randomForest_4.7-1.1 car_3.1-2 dbplyr_2.5.0
## [124] xtable_1.8-4 cluster_2.1.6 evaluate_0.24.0
## [127] mvtnorm_1.2-4 cli_3.6.2 locfit_1.5-9.8
## [130] compiler_4.3.2 rlang_1.1.3 crayon_1.5.3
## [133] rngtools_1.5.2 ggsignif_0.6.4 labeling_0.4.3
## [136] affy_1.80.0 plyr_1.8.9 stringi_1.8.3
## [139] assertthat_0.2.1 munsell_0.5.1 Biostrings_2.70.1
## [142] Matrix_1.6-5 hms_1.1.3 bit64_4.0.5
## [145] KEGGREST_1.42.0 statmod_1.5.0 shiny_1.8.1.1
## [148] highr_0.11 mzR_2.36.0 broom_1.0.6
## [151] memoise_2.0.1 affyio_1.72.0 bslib_0.7.0
## [154] bit_4.0.5